#!/bin/bash -e

function log() {
	echo 1>&2 "$@"
}

function abort() {
	log "$@"
	log "Aborting."
	exit 1
}

function guess_remote_dump_url() {
	local REMOTE_DUMP_DIR_URL="https://dumps.wikimedia.org/enwiki/latest/"
	local REMOTE_DUMP_REGEX="href=\"(enwiki-latest-pages-meta-current1.xml[^\"]+.bz2)\""
	echo -n $REMOTE_DUMP_DIR_URL
	curl -s $REMOTE_DUMP_DIR_URL | grep -E "$REMOTE_DUMP_REGEX" \
		| sed -r "s/.*$REMOTE_DUMP_REGEX.*/\\1/" \
		| head -n 1 \
		|| abort "Could not guess the URL to get the remote dump from."
}

function do_psql() {
	local options="--link "$DOCKER_CONTAINER_NAME":postgres -e PGPASSWORD=hibernate_demo"
	if [ -n "$DOCKER_NETWORK_NAME" ]
	then
		options+=" --net $DOCKER_NETWORK_NAME"
	fi
	sudo -p "Sudo password (to execute docker): " docker run -i --rm $options postgres:11.1 psql -h postgres -U hibernate_demo hsearch_es_wikipedia
}


which xsltproc 1>/dev/null 2>&1 || abort "Command 'xsltproc' isn't in the \$PATH; please install the command or add it to the \$PATH."

DOCKER_CONTAINER_NAME="hsearch-elasticsearch-wikipedia_pg01_1"
DOCKER_NETWORK_NAME="hsearch-elasticsearch-wikipedia_pgnet"
LOCAL_DUMP_FILE=
while getopts 'c:n:f:' opt
do
        case "$opt" in
                c)
			DOCKER_CONTAINER_NAME="$OPTARG"
                        ;;
                n)
			DOCKER_NETWORK_NAME="$OPTARG"
                        ;;
		f)
			LOCAL_DUMP_FILE="$OPTARG"
			;;
                \?)
                        exit 1
                        ;;
        esac
done
shift $(( OPTIND - 1 ))

log "Will attempt to connect to docker container with name '$DOCKER_CONTAINER_NAME' within network '$DOCKER_NETWORK_NAME'"

if [ -z "$LOCAL_DUMP_FILE" ]
then
	log "No dump file path given; will try to retrieve the dump file automatically."
	LOCAL_DUMP_DIR="/tmp/hsearch_es_wikipedia"
	LOCAL_DUMP_FILE="$LOCAL_DUMP_DIR/enwiki-latest-pages-meta-current1.xml"

	if [ -e $LOCAL_DUMP_FILE ]
	then
		log "Found cached file from previous initialization at '$LOCAL_DUMP_FILE'; using this cached file."
	else
		REMOTE_DUMP_URL=$(guess_remote_dump_url)
		LOCAL_COMPRESSED_DUMP_FILE="$LOCAL_DUMP_FILE.bz2"
		mkdir -p "$LOCAL_DUMP_DIR"
		log "Attempting to retrieve the dump file from '$REMOTE_DUMP_URL'..."
		curl --progress -o "$LOCAL_COMPRESSED_DUMP_FILE" "$REMOTE_DUMP_URL"
		log "... done. Uncompressing..."
		bunzip2 "$LOCAL_COMPRESSED_DUMP_FILE"
		log "... done."
	fi
else
	[ -f "$LOCAL_DUMP_FILE" ] || abort "Usage: ./init.sh <mediawiki XML dump>"
fi

DIR=$(readlink -f "$0" | xargs dirname)

xsltproc "$DIR/sql_populate.xslt" $LOCAL_DUMP_FILE | do_psql

